Data Preprocessing:¶Data visualisation:¶Data Preprocessing:¶Cleaning data in the dataset that may affect machine learning.
Machine Learning:¶Predicting business type information corresponding to property data in 2026 through machine learning.
Import the required library
# Data manipulation
import pandas as pd
import numpy as np
# Geo data handling
import geopandas as gpd
from shapely.geometry import Polygon, MultiPolygon
# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from folium import plugins
from folium.plugins import MarkerCluster
import branca.colormap as cm
# Importing data from web
import requests
import json
import io
from io import BytesIO
from zipfile import ZipFile
import tempfile
# Machine learning preprocessing and evaluation
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
# Set matplotlib to display inline
%matplotlib inline
Get data from the City of Melbourne's open data platform
# Define a function to download and read a CSV file.
def download_and_load_csv(url):
response = requests.get(url)
csv_string = response.content.decode('utf-8')
df = pd.read_csv(io.StringIO(csv_string))
return df
# API Link
download_link_1 = 'https://data.melbourne.vic.gov.au/api/explore/v2.1/catalog/datasets/property-level-energy-consumption-modelled-on-building-attributes-baseline-2011-/exports/csv?lang=en&timezone=Australia%2FSydney&use_labels=true&delimiter=%2C'
download_link_2 = 'https://data.melbourne.vic.gov.au/api/explore/v2.1/catalog/datasets/business-establishments-with-address-and-industry-classification/exports/csv?lang=en&timezone=Australia%2FSydney&use_labels=true&delimiter=%2C'
# Use functions to download and load data
energy_df = download_and_load_csv(download_link_1)
business_df = download_and_load_csv(download_link_2)
Merging data
# Filter for Census years 2011, 2016, and 2021, and drop other columns
business_df1 = business_df[business_df['Census year'].isin([2011, 2016, 2021])].drop(columns=['Base property ID', 'Longitude', 'Latitude'])
energy_df1 = energy_df[['Geo Point', 'Geo Shape', 'total_2011', 'total_2016', 'total_2021', 'total_2026', 'property_id', 'floor_area']]
# Do not merge if null value exists
merged_df = pd.merge(left=business_df1, right=energy_df1, left_on='Property ID', right_on='property_id', how='inner')
merged_df
| Census year | Block ID | Property ID | CLUE small area | Trading name | Business address | Industry (ANZSIC4) code | Industry (ANZSIC4) description | Geo Point | Geo Shape | total_2011 | total_2016 | total_2021 | total_2026 | property_id | floor_area | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2021 | 1 | 108843 | Melbourne (CBD) | Vacant | 38 Rebecca Walk MELBOURNE VIC 3000 | 0 | Vacant Space | -37.82161126424501, 144.95659341052604 | {"coordinates": [[[[144.9576429968324, -37.820... | 129.723347 | 188.072472 | 243.024706 | 296.334736 | 108843.0 | 0.00 |
| 1 | 2021 | 1 | 108843 | Melbourne (CBD) | Vacant | 62 Rebecca Walk MELBOURNE VIC 3000 | 0 | Vacant Space | -37.82161126424501, 144.95659341052604 | {"coordinates": [[[[144.9576429968324, -37.820... | 129.723347 | 188.072472 | 243.024706 | 296.334736 | 108843.0 | 0.00 |
| 2 | 2021 | 1 | 108843 | Melbourne (CBD) | Vacant | 56 Rebecca Walk MELBOURNE VIC 3000 | 0 | Vacant Space | -37.82161126424501, 144.95659341052604 | {"coordinates": [[[[144.9576429968324, -37.820... | 129.723347 | 188.072472 | 243.024706 | 296.334736 | 108843.0 | 0.00 |
| 3 | 2021 | 1 | 108843 | Melbourne (CBD) | Vacant | 68 Rebecca Walk MELBOURNE VIC 3000 | 0 | Vacant Space | -37.82161126424501, 144.95659341052604 | {"coordinates": [[[[144.9576429968324, -37.820... | 129.723347 | 188.072472 | 243.024706 | 296.334736 | 108843.0 | 0.00 |
| 4 | 2021 | 1 | 108843 | Melbourne (CBD) | Vacant | 8 Rebecca Walk MELBOURNE VIC 3000 | 0 | Vacant Space | -37.82161126424501, 144.95659341052604 | {"coordinates": [[[[144.9576429968324, -37.820... | 129.723347 | 188.072472 | 243.024706 | 296.334736 | 108843.0 | 0.00 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 49457 | 2021 | 361 | 106990 | North Melbourne | O'Connell Residence | 15-19 O'Connell Street NORTH MELBOURNE VIC 3051 | 4400 | Accommodation | -37.80543035745161, 144.9578299828369 | {"coordinates": [[[[144.95771458474408, -37.80... | 332.873321 | 344.929276 | 357.300178 | 369.713308 | 106990.0 | 0.00 |
| 49458 | 2021 | 418 | 104738 | West Melbourne (Residential) | Tricycle Developments Pty Ltd | 187-189 Hawke Street WEST MELBOURNE VIC 3003 | 6923 | Engineering Design and Engineering Consulting ... | -37.80922785700179, 144.94505794304052 | {"coordinates": [[[[144.94517507713024, -37.80... | 7.043330 | 28.864419 | 32.899912 | 37.672850 | 104738.0 | 219.70 |
| 49459 | 2021 | 655 | 103971 | East Melbourne | Vacant | 295-357 Wellington Parade South MELBOURNE VIC ... | 0 | Vacant Space | -37.81632322339168, 144.9747363042855 | {"coordinates": [[[[144.97613572056335, -37.81... | 0.000006 | 0.000000 | 0.000000 | 0.000000 | 103971.0 | 0.00 |
| 49460 | 2021 | 606 | 110016 | East Melbourne | BeetBox | 10 Wellington Parade EAST MELBOURNE VIC 3002 | 6924 | Other Specialised Design Services | -37.8166280648155, 144.98963209678806 | {"coordinates": [[[[144.98968675109631, -37.81... | 17.731589 | 57.688100 | 66.677178 | 76.374827 | 110016.0 | 261.90 |
| 49461 | 2021 | 612 | 109621 | East Melbourne | East Melbourne Orthopaedic Clinic | 520 Victoria Parade EAST MELBOURNE VIC 3002 | 8512 | Specialist Medical Services | -37.81000930287354, 144.98862165463927 | {"coordinates": [[[[144.98861194795919, -37.80... | 4.253241 | 19.107383 | 21.902796 | 24.884924 | 109621.0 | 202.67 |
49462 rows × 16 columns
# Checking the results of data merging
unique_years = merged_df['Census year'].unique()
print(unique_years)
[2021 2016 2011]
Processing geolocation information in data
# Preparing the energy consumption datasets
def get_coords(row):
# Parse the 'Geo Shape' data from a JSON string to a Python dictionary
geo_shape = json.loads(row['geometry'])
# Check the geometry type 'MultiPolygon'
if geo_shape['type'] == 'MultiPolygon':
# Extract coordinates for MultiPolygon
coordinates = geo_shape['coordinates']
polygons = [Polygon(p[0]) for p in coordinates] # Extract each polygon
row["geometry"] = MultiPolygon(polygons)
else:
row["geometry"] = None # Handle other types or malformed data
return row
def prepare_dataset(dataframe):
# Create a new DataFrame to store the modified data
new_dataframe = dataframe.copy()
# Rename columns
new_dataframe = new_dataframe.rename(columns={"Geo Shape" : "geometry"})
# Apply the get_coords function and update the 'geometry' column
new_dataframe['geometry'] = new_dataframe.apply(get_coords, axis=1)['geometry']
# Convert numeric columns
new_dataframe["total_2011"] = pd.to_numeric(new_dataframe['total_2011'], errors='coerce')
new_dataframe["total_2016"] = pd.to_numeric(new_dataframe['total_2016'], errors='coerce')
new_dataframe["total_2021"] = pd.to_numeric(new_dataframe['total_2021'], errors='coerce')
new_dataframe["total_2026"] = pd.to_numeric(new_dataframe['total_2026'], errors='coerce')
new_dataframe["floor_area"] = pd.to_numeric(new_dataframe['floor_area'], errors='coerce')
# Convert the DataFrame to a GeoDataFrame
return gpd.GeoDataFrame(new_dataframe, geometry='geometry', crs=4326)
model_gdf_p = prepare_dataset(merged_df)
model_gdf_p
| Census year | Block ID | Property ID | CLUE small area | Trading name | Business address | Industry (ANZSIC4) code | Industry (ANZSIC4) description | Geo Point | geometry | total_2011 | total_2016 | total_2021 | total_2026 | property_id | floor_area | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2021 | 1 | 108843 | Melbourne (CBD) | Vacant | 38 Rebecca Walk MELBOURNE VIC 3000 | 0 | Vacant Space | -37.82161126424501, 144.95659341052604 | MULTIPOLYGON (((144.95764 -37.82094, 144.95771... | 129.723347 | 188.072472 | 243.024706 | 296.334736 | 108843.0 | 0.00 |
| 1 | 2021 | 1 | 108843 | Melbourne (CBD) | Vacant | 62 Rebecca Walk MELBOURNE VIC 3000 | 0 | Vacant Space | -37.82161126424501, 144.95659341052604 | MULTIPOLYGON (((144.95764 -37.82094, 144.95771... | 129.723347 | 188.072472 | 243.024706 | 296.334736 | 108843.0 | 0.00 |
| 2 | 2021 | 1 | 108843 | Melbourne (CBD) | Vacant | 56 Rebecca Walk MELBOURNE VIC 3000 | 0 | Vacant Space | -37.82161126424501, 144.95659341052604 | MULTIPOLYGON (((144.95764 -37.82094, 144.95771... | 129.723347 | 188.072472 | 243.024706 | 296.334736 | 108843.0 | 0.00 |
| 3 | 2021 | 1 | 108843 | Melbourne (CBD) | Vacant | 68 Rebecca Walk MELBOURNE VIC 3000 | 0 | Vacant Space | -37.82161126424501, 144.95659341052604 | MULTIPOLYGON (((144.95764 -37.82094, 144.95771... | 129.723347 | 188.072472 | 243.024706 | 296.334736 | 108843.0 | 0.00 |
| 4 | 2021 | 1 | 108843 | Melbourne (CBD) | Vacant | 8 Rebecca Walk MELBOURNE VIC 3000 | 0 | Vacant Space | -37.82161126424501, 144.95659341052604 | MULTIPOLYGON (((144.95764 -37.82094, 144.95771... | 129.723347 | 188.072472 | 243.024706 | 296.334736 | 108843.0 | 0.00 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 49457 | 2021 | 361 | 106990 | North Melbourne | O'Connell Residence | 15-19 O'Connell Street NORTH MELBOURNE VIC 3051 | 4400 | Accommodation | -37.80543035745161, 144.9578299828369 | MULTIPOLYGON (((144.95771 -37.80555, 144.95768... | 332.873321 | 344.929276 | 357.300178 | 369.713308 | 106990.0 | 0.00 |
| 49458 | 2021 | 418 | 104738 | West Melbourne (Residential) | Tricycle Developments Pty Ltd | 187-189 Hawke Street WEST MELBOURNE VIC 3003 | 6923 | Engineering Design and Engineering Consulting ... | -37.80922785700179, 144.94505794304052 | MULTIPOLYGON (((144.94518 -37.80926, 144.94519... | 7.043330 | 28.864419 | 32.899912 | 37.672850 | 104738.0 | 219.70 |
| 49459 | 2021 | 655 | 103971 | East Melbourne | Vacant | 295-357 Wellington Parade South MELBOURNE VIC ... | 0 | Vacant Space | -37.81632322339168, 144.9747363042855 | MULTIPOLYGON (((144.97614 -37.81671, 144.97611... | 0.000006 | 0.000000 | 0.000000 | 0.000000 | 103971.0 | 0.00 |
| 49460 | 2021 | 606 | 110016 | East Melbourne | BeetBox | 10 Wellington Parade EAST MELBOURNE VIC 3002 | 6924 | Other Specialised Design Services | -37.8166280648155, 144.98963209678806 | MULTIPOLYGON (((144.98969 -37.81652, 144.98968... | 17.731589 | 57.688100 | 66.677178 | 76.374827 | 110016.0 | 261.90 |
| 49461 | 2021 | 612 | 109621 | East Melbourne | East Melbourne Orthopaedic Clinic | 520 Victoria Parade EAST MELBOURNE VIC 3002 | 8512 | Specialist Medical Services | -37.81000930287354, 144.98862165463927 | MULTIPOLYGON (((144.98861 -37.80990, 144.98867... | 4.253241 | 19.107383 | 21.902796 | 24.884924 | 109621.0 | 202.67 |
49462 rows × 16 columns
Split the data again for outlier processing.
# Split data by Census year
df_2011 = model_gdf_p[model_gdf_p['Census year'] == 2011].drop(columns=['total_2016', 'total_2021', 'total_2026'])
df_2016 = model_gdf_p[model_gdf_p['Census year'] == 2016].drop(columns=['total_2011', 'total_2021', 'total_2026'])
df_2021 = model_gdf_p[model_gdf_p['Census year'] == 2021].drop(columns=['total_2011', 'total_2016', 'total_2026'])
df_2021_l = model_gdf_p[model_gdf_p['Census year'] == 2021].drop(columns=['total_2011', 'total_2016', 'total_2021'])
def update_total_by_code_count(df, total_column):
"""
Updates the total column of the Dataset based on the number of unique Industry associated with
each Property ID.
Parameters:
- df: DataFrame containing the data.
- total_column: str, the name of the column containing total values that need to be updated.
Returns:
- df: DataFrame with the updated total column.
"""
# Calculate the number of Industry (ANZSIC4) codes corresponding to each Property ID
code_counts = df.groupby('Property ID')['Industry (ANZSIC4) code'].transform('nunique')
# Updates the total column
df[total_column] = df[total_column] / code_counts
return df
# Apply the function to update each DataFrame
df_2011 = update_total_by_code_count(df_2011, 'total_2011')
df_2016 = update_total_by_code_count(df_2016, 'total_2016')
df_2021 = update_total_by_code_count(df_2021, 'total_2021')
df_2021_l = update_total_by_code_count(df_2021_l, 'total_2026')
df_2026 = df_2021_l.copy().assign(**{df_2021_l.columns[0]: 2026})
Clean data to match data set description.
columns_to_clear = ['Trading name', 'Business address',
'Industry (ANZSIC4) code',
'Industry (ANZSIC4) description']
df_2026[columns_to_clear] = np.nan
df_2026
| Census year | Block ID | Property ID | CLUE small area | Trading name | Business address | Industry (ANZSIC4) code | Industry (ANZSIC4) description | Geo Point | geometry | total_2026 | property_id | floor_area | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2026 | 1 | 108843 | Melbourne (CBD) | NaN | NaN | NaN | NaN | -37.82161126424501, 144.95659341052604 | MULTIPOLYGON (((144.95764 -37.82094, 144.95771... | 59.266947 | 108843.0 | 0.00 |
| 1 | 2026 | 1 | 108843 | Melbourne (CBD) | NaN | NaN | NaN | NaN | -37.82161126424501, 144.95659341052604 | MULTIPOLYGON (((144.95764 -37.82094, 144.95771... | 59.266947 | 108843.0 | 0.00 |
| 2 | 2026 | 1 | 108843 | Melbourne (CBD) | NaN | NaN | NaN | NaN | -37.82161126424501, 144.95659341052604 | MULTIPOLYGON (((144.95764 -37.82094, 144.95771... | 59.266947 | 108843.0 | 0.00 |
| 3 | 2026 | 1 | 108843 | Melbourne (CBD) | NaN | NaN | NaN | NaN | -37.82161126424501, 144.95659341052604 | MULTIPOLYGON (((144.95764 -37.82094, 144.95771... | 59.266947 | 108843.0 | 0.00 |
| 4 | 2026 | 1 | 108843 | Melbourne (CBD) | NaN | NaN | NaN | NaN | -37.82161126424501, 144.95659341052604 | MULTIPOLYGON (((144.95764 -37.82094, 144.95771... | 59.266947 | 108843.0 | 0.00 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 49457 | 2026 | 361 | 106990 | North Melbourne | NaN | NaN | NaN | NaN | -37.80543035745161, 144.9578299828369 | MULTIPOLYGON (((144.95771 -37.80555, 144.95768... | 369.713308 | 106990.0 | 0.00 |
| 49458 | 2026 | 418 | 104738 | West Melbourne (Residential) | NaN | NaN | NaN | NaN | -37.80922785700179, 144.94505794304052 | MULTIPOLYGON (((144.94518 -37.80926, 144.94519... | 37.672850 | 104738.0 | 219.70 |
| 49459 | 2026 | 655 | 103971 | East Melbourne | NaN | NaN | NaN | NaN | -37.81632322339168, 144.9747363042855 | MULTIPOLYGON (((144.97614 -37.81671, 144.97611... | 0.000000 | 103971.0 | 0.00 |
| 49460 | 2026 | 606 | 110016 | East Melbourne | NaN | NaN | NaN | NaN | -37.8166280648155, 144.98963209678806 | MULTIPOLYGON (((144.98969 -37.81652, 144.98968... | 76.374827 | 110016.0 | 261.90 |
| 49461 | 2026 | 612 | 109621 | East Melbourne | NaN | NaN | NaN | NaN | -37.81000930287354, 144.98862165463927 | MULTIPOLYGON (((144.98861 -37.80990, 144.98867... | 24.884924 | 109621.0 | 202.67 |
16516 rows × 13 columns
Visualisation of outliers via box plots
def plot_energyconsumption_by_area_per_year(dfs, years):
"""
Plots individual boxplots of energy consumption for different areas, each plot corresponding to each year.
Parameters:
- dfs: list of DataFrames for each year.
- years: list of years corresponding to each DataFrame.
"""
for df, year in zip(dfs, years):
# Prepare data for plotting
df = df.copy()
total_column = f'total_{year}' # Adjust based on the provided structure
# Check if the expected total column exists in the DataFrame
if total_column not in df.columns:
print(f"Column {total_column} does not exist in the DataFrame for the year {year}.")
continue
df['Year'] = year
df.rename(columns={total_column: 'Total Energy Consumption'}, inplace=True)
plot_data = df[['CLUE small area', 'Total Energy Consumption']]
# New figure for each year
plt.figure(figsize=(10, 6))
sns.boxplot(x='CLUE small area', y='Total Energy Consumption', data=plot_data)
plt.title(f'Energy Consumption by Area in {year}')
plt.xlabel('CLUE Small Area')
plt.ylabel('Total Energy Consumption')
# Rotate x-axis labels for better readability
plt.xticks(rotation=90)
plt.show()
# Plotting the boxplot
plot_energyconsumption_by_area_per_year([df_2011, df_2016, df_2021, df_2026], [2011, 2016, 2021, 2026])
Remove outliers and visualise results
def remove_outliers(df, year):
"""
Removes outliers from a DataFrame based on the IQR method.
Parameters:
df: DataFrame containing the data.
column_name: str, the name of the column from which need to remove outliers.
Returns:
df: DataFrame with outliers removed.
"""
total_column = f'total_{year}'
Q1 = df[total_column].quantile(0.25)
Q3 = df[total_column].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
# Return the DataFrame without outliers
return df[(df[total_column] >= lower_bound) & (df[total_column] <= upper_bound)]
df_2011 = remove_outliers(df_2011, '2011')
df_2016 = remove_outliers(df_2016, '2016')
df_2021 = remove_outliers(df_2021, '2021')
df_2026 = remove_outliers(df_2026, '2026')
plot_energyconsumption_by_area_per_year([df_2011, df_2016, df_2021, df_2026], [2011, 2016, 2021, 2026])
def remove_zero(df, year):
"""
Removes all rows belonging to industries whose total sum for the specified year is zero.
For the year 2026, it ignores the industry code totals.
Parameters:
df : DataFrame containing the data.
year : str, The year column suffix in the DataFrame to identify the total column.
Returns:
df : DataFrame with zero total industries removed.
"""
total_column = f'total_{year}'
# Total energy consumption by Property ID
property_totals = df.groupby('Property ID')[total_column].sum()
# Property ID with extremely low energy consumption
zero_total_properties = property_totals[property_totals == 0.0].index.tolist()
# Delete corresponding data
df = df[~df['Property ID'].isin(zero_total_properties)]
# Delete of industries with extremely low energy consumption by industry code
if year != '2026':
industry_totals = df.groupby('Industry (ANZSIC4) code')[total_column].sum()
zero_total_industries = industry_totals[industry_totals <= 5].index.tolist()
df = df[~df['Industry (ANZSIC4) code'].isin(zero_total_industries)]
return df
# Modifying Data with Functions
df_2011 = remove_zero(df_2011, '2011')
df_2016 = remove_zero(df_2016, '2016')
df_2021 = remove_zero(df_2021, '2021')
df_2026 = remove_zero(df_2026, '2026')
Total energy consumption in different sectors through bar charts
def plot_top_bottom_industries(df, year, n=10):
"""
Plots top and bottom n industries by total for a given year, with labels on each bar,
only if the total sum is not zero.
Parameters:
df : DataFrame containing the data.
year : str, The year column suffix in the DataFrame to identify the total column.
n : int - Number of top and bottom industries to display.
"""
# Grouping by industry description and calculating the sum for the corresponding year
total_column = f'total_{year}'
industry_totals = df.groupby('Industry (ANZSIC4) description')[total_column].sum()
# Select the highest and lowest industries
top_industries = industry_totals.nlargest(n)
bottom_industries = industry_totals.nsmallest(n)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 10))
# Plotting the highest energy-consuming industries
top_industries.plot(kind='bar', color='skyblue', ax=ax1)
ax1.set_title(f'Top {n} Industries by Total in {year}')
ax1.set_xlabel('Industry (ANZSIC4) Description')
ax1.set_ylabel(f'Total {year}')
ax1.tick_params(axis='x', rotation=90)
# Adding numeric labels to bar charts
for p in ax1.patches:
ax1.annotate(f'{p.get_height():,.0f}', (p.get_x() + p.get_width() / 2., p.get_height()),
ha='center', va='center', xytext=(0, 10), textcoords='offset points')
# Plotting the lowest energy-consuming industries
bottom_industries.plot(kind='bar', color='salmon', ax=ax2)
ax2.set_title(f'Bottom {n} Industries by Total in {year}')
ax2.set_xlabel('Industry (ANZSIC4) Description')
ax2.set_ylabel(f'Total {year}')
ax2.tick_params(axis='x', rotation=90)
# Adding numeric labels to bar charts
for p in ax2.patches:
ax2.annotate(f'{p.get_height():,.0f}', (p.get_x() + p.get_width() / 2., p.get_height()),
ha='center', va='center', xytext=(0, 10), textcoords='offset points')
plt.tight_layout()
plt.show()
plot_top_bottom_industries(df_2011, '2011')
plot_top_bottom_industries(df_2016, '2016')
plot_top_bottom_industries(df_2021, '2021')
Result Analysis
There are a large number of vacant areas in Melbourne's city area that still consume a large amount of energy and cause energy wastage.
# Standardization of the energy-consumption columns
df_2011['Energy Consumption'] = df_2011['total_2011']
df_2016['Energy Consumption'] = df_2016['total_2016']
df_2021['Energy Consumption'] = df_2021['total_2021']
df_2026['Energy Consumption'] = df_2026['total_2026']
# Merge datasets
frames = [df_2011, df_2016, df_2021, df_2026]
model_gdf = pd.concat(frames)
model_gdf
| Census year | Block ID | Property ID | CLUE small area | Trading name | Business address | Industry (ANZSIC4) code | Industry (ANZSIC4) description | Geo Point | geometry | total_2011 | property_id | floor_area | Energy Consumption | total_2016 | total_2021 | total_2026 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 15 | 2011 | 1 | 108843 | Melbourne (CBD) | Citipower Pty | Substation 142, Rear 581 Spencer Street MELBOU... | 2630.0 | Electricity Distribution | -37.82161126424501, 144.95659341052604 | MULTIPOLYGON (((144.95764 -37.82094, 144.95771... | 11.793032 | 108843.0 | 0.00 | 11.793032 | NaN | NaN | NaN |
| 16 | 2011 | 1 | 108843 | Melbourne (CBD) | Foot & Body (8) | 8 Spencer Street MELBOURNE 3000 | 9511.0 | Hairdressing and Beauty Services | -37.82161126424501, 144.95659341052604 | MULTIPOLYGON (((144.95764 -37.82094, 144.95771... | 11.793032 | 108843.0 | 0.00 | 11.793032 | NaN | NaN | NaN |
| 17 | 2011 | 1 | 108843 | Melbourne (CBD) | The Clothesline Collective | 62 Spencer Street MELBOURNE 3000 | 4255.0 | Womens Clothing Retailing | -37.82161126424501, 144.95659341052604 | MULTIPOLYGON (((144.95764 -37.82094, 144.95771... | 11.793032 | 108843.0 | 0.00 | 11.793032 | NaN | NaN | NaN |
| 18 | 2011 | 1 | 108843 | Melbourne (CBD) | Vacant | 44 Spencer Street MELBOURNE 3000 | 0.0 | Vacant Space | -37.82161126424501, 144.95659341052604 | MULTIPOLYGON (((144.95764 -37.82094, 144.95771... | 11.793032 | 108843.0 | 0.00 | 11.793032 | NaN | NaN | NaN |
| 19 | 2011 | 1 | 108843 | Melbourne (CBD) | Vacant | 2 Spencer Street MELBOURNE 3000 | 0.0 | Vacant Space | -37.82161126424501, 144.95659341052604 | MULTIPOLYGON (((144.95764 -37.82094, 144.95771... | 11.793032 | 108843.0 | 0.00 | 11.793032 | NaN | NaN | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 49456 | 2026 | 357 | 105035 | North Melbourne | NaN | NaN | NaN | NaN | -37.80436576436303, 144.95418669327805 | MULTIPOLYGON (((144.95430 -37.80433, 144.95428... | NaN | 105035.0 | 730.80 | 475.503940 | NaN | NaN | 475.503940 |
| 49457 | 2026 | 361 | 106990 | North Melbourne | NaN | NaN | NaN | NaN | -37.80543035745161, 144.9578299828369 | MULTIPOLYGON (((144.95771 -37.80555, 144.95768... | NaN | 106990.0 | 0.00 | 369.713308 | NaN | NaN | 369.713308 |
| 49458 | 2026 | 418 | 104738 | West Melbourne (Residential) | NaN | NaN | NaN | NaN | -37.80922785700179, 144.94505794304052 | MULTIPOLYGON (((144.94518 -37.80926, 144.94519... | NaN | 104738.0 | 219.70 | 37.672850 | NaN | NaN | 37.672850 |
| 49460 | 2026 | 606 | 110016 | East Melbourne | NaN | NaN | NaN | NaN | -37.8166280648155, 144.98963209678806 | MULTIPOLYGON (((144.98969 -37.81652, 144.98968... | NaN | 110016.0 | 261.90 | 76.374827 | NaN | NaN | 76.374827 |
| 49461 | 2026 | 612 | 109621 | East Melbourne | NaN | NaN | NaN | NaN | -37.81000930287354, 144.98862165463927 | MULTIPOLYGON (((144.98861 -37.80990, 144.98867... | NaN | 109621.0 | 202.67 | 24.884924 | NaN | NaN | 24.884924 |
52820 rows × 17 columns
Via an interactive map that shows the number of trades for different property IDs in different years.
# Convert DataFrame to GeoDataFrame
gdf = gpd.GeoDataFrame(model_gdf, geometry=gpd.points_from_xy(model_gdf['Geo Point'].apply(
lambda x: float(x.split(',')[1])), model_gdf['Geo Point'].apply(lambda x: float(x.split(',')[0]))))
# Calculate the number of trades per property ID per year
gdf['industry_count'] = gdf.groupby(['Property ID', 'Census year'])['Industry (ANZSIC4) code'].transform('nunique')
# Setting the map plotting function
def plot_industry_count_by_year_interactive():
# Initialise map
m = folium.Map(
location=[-37.8136, 144.9631],
zoom_start=12)
layers = {}
# Processing each year
for year in [2011, 2016, 2021]:
marker_cluster = MarkerCluster(name=str(year), show=False)
# Add markers to the map
for _, row in gdf[gdf['Census year'] == year].iterrows():
folium.Marker(
location=[row.geometry.y, row.geometry.x],
icon=folium.DivIcon(html=f"""
<div style="font-family: 'Arial'; color: 'blue'; font-size: 12pt">
{row['industry_count']}</div>""")
).add_to(marker_cluster)
marker_cluster.add_to(m)
layers[str(year)] = marker_cluster
# Initial visibility of first layer
layers['2011'].show = True
folium.LayerControl().add_to(m)
return m
# Display interactive map
map_display = plot_industry_count_by_year_interactive()
map_display